"""
@-*- coding: utf-8 -*-
@ python: 3.12.3
@ 创建者: JacksonCode
@ 创建时间: 2025/6/24
"""
import requests

from lxml import etree

dir_name = "xiaoshuoLib"

headers = {  # 存放需要伪装的头信息
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}

main_url = 'https://bixuejian.5000yan.com/'
response = requests.get(url=main_url, headers=headers)
response.encoding = 'utf-8'
page_text = response.text

# 解析首页
tree = etree.HTML(page_text)
a_alist = tree.xpath("/html/body/div[2]/div/div[1]/div[3]/ul/li/a")

for a in a_alist:
    #局部解析
    title = a.xpath('./text()')[0]
    detail_url = a.xpath('./@href')[0]
    detail_response = requests.get(url=detail_url,headers=headers)
    detail_response.encoding = 'utf-8'
    detail_page_text = detail_response.text
    tree = etree.HTML(detail_page_text)
    content_list = tree.xpath('/html/body/div[2]/div/div[1]/div[3]/div[4]//text()')
    content = ''.join(content_list)
    with open('./xiaoshuoLib/'+title+'.txt','w',encoding='utf-8') as fp:
        fp.write(title+'\n'+content)
        print(title,":章节内容爬取保存成功！")
